Restaurant Inspections (Delaware)¶

Mitch Fawcett November 9, 2023

This is an interactive map showing the results of health inspections of food prep and serving establishments in Delaware

In [1]:
%%capture --no-display  
# Previous line supresses a warning about package versioning

# Load libraries
import pandas as pd   # for dataframe manipulation
import numpy as np  # for numerical analysis
import matplotlib.pyplot as plt   # for generating plots and graphs
from matplotlib.pyplot import figure  # for modifying appearance of plots & graphs
import requests   # to make http post requests to the US Census geocoder
import io  # for working with I/O streams and allow conversion of geocode response to dataframe 
import csv  # reading/writing csv files
import pickle as pk # to store and retrieve dataframes on disk
import csv  # to read text files
import requests # to make http requests for data using census web API
import os   # to list contents of disk drive folders
import sys  # for managing system options
import folium  # the mapping package
from folium import plugins  # to allow cluster markers on maps
import seaborn as sns   # for fancy plotting
from IPython.display import Markdown as md  # for embedding variables in markdown cells
from IPython.display import Image   # to embeg jpg images in notebook
from datetime import datetime
In [2]:
# Settings to improve the display of tabular results
pd.set_option('max_colwidth', 4000)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
np.set_printoptions(threshold = sys.maxsize)

IMPORT THE RAW DATASET FROM DELAWARE.GOV¶

In [3]:
# Import inspection data into a dataframe.  Data  provided by Data.Delaware.Gov Website
# https://data.delaware.gov/Health/Restaurant-Inspection-Violations/384s-wygj

inspections_df = pd.read_table('Restaurant_Inspection_Violations_20231105.tsv', delimiter = '\t')

# Replaces spaces with '_' in column names
inspections_df.columns = [c.replace(' ', '_') for c in inspections_df.columns]

# Convert the inspection date to a real date value so they can be sorted later
inspections_df['Inspection_Date']= pd.to_datetime(inspections_df['Inspection_Date'])
In [4]:
# Inspect the raw data
inspections_df.head()
Out[4]:
Food_Establishment_Name Food_Establishment_Street_Address Food_Establishment_City Food_Establishment_Zip_Code Inspection_Date Inspection_Type Violation_Code Violation_Description Geocoded_Location
0 1 In Town African Restaurant 240 S Dupont Hwy Suite # 300 New Castle 19720.0 2022-03-04 Routine NaN NaN 240 S Dupont Hwy Suite # 300\nNew Castle, DE 19720\n(39.653333979, -75.616564004)
1 1000 Degree Pizza 4526 New Linden Hill Road Wilmington 19808.0 2022-07-08 Routine 2-102.12 Certified Food Protection Manager 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456)
2 1000 Degree Pizza 4526 New Linden Hill Road Wilmington 19808.0 2022-07-08 Routine 5-202.12 Handwashing Sinks, Installation 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456)
3 1000 Degree Pizza 4526 New Linden Hill Road Wilmington 19808.0 2022-07-08 Routine 6-301.12 Hand Drying Provision 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456)
4 1000 Degree Pizza 4526 New Linden Hill Road Wilmington 19808.0 2022-07-08 Routine 3-501.16 Time/Temperature Control for Safety Food, Hot and Cold Holding 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456)

CREATE COLUMNS FOR LONGITUDE & LATITUDE OF ESTABLISHMENTS¶

In [5]:
# Locations will be mapped using the longitude/latitude values in the 'Geocoded_Location column'.
# These numbers need to be extracted out of Geocoded_Location and added as a new column.

# Split the Geocoded_Location column on newline character \n
coordinates_df = inspections_df.Geocoded_Location.str.split(pat="\n", expand=True)

# Give the cooridnates column a better name than '2'
coordinates_df.rename(columns={0:'col_1', 1:'col_2', 2:'Lat_Long'},inplace=True)

# Inspect the result
coordinates_df["Lat_Long"].head()
Out[5]:
0    (39.653333979, -75.616564004)
1    (39.737731167, -75.690144456)
2    (39.737731167, -75.690144456)
3    (39.737731167, -75.690144456)
4    (39.737731167, -75.690144456)
Name: Lat_Long, dtype: object
In [6]:
# Add it as another column to the dataset
inspections_df['Lat_Long'] = coordinates_df["Lat_Long"]
In [7]:
# Separate the cooordinate pairs into two columns for Longitude and Latitude
inspections_df[['Latitude', 'Longitude']] = inspections_df['Lat_Long'].str.rsplit(', ', expand=True)

# Get rid of '(' and ')' characters in the new columns
inspections_df['Latitude'] = inspections_df['Latitude'].str.replace('(','')
inspections_df['Longitude'] = inspections_df['Longitude'].str.replace(')','')
In [8]:
# Inspect the data now has a dedicated column for coordinates
inspections_df.head()
Out[8]:
Food_Establishment_Name Food_Establishment_Street_Address Food_Establishment_City Food_Establishment_Zip_Code Inspection_Date Inspection_Type Violation_Code Violation_Description Geocoded_Location Lat_Long Latitude Longitude
0 1 In Town African Restaurant 240 S Dupont Hwy Suite # 300 New Castle 19720.0 2022-03-04 Routine NaN NaN 240 S Dupont Hwy Suite # 300\nNew Castle, DE 19720\n(39.653333979, -75.616564004) (39.653333979, -75.616564004) 39.653333979 -75.616564004
1 1000 Degree Pizza 4526 New Linden Hill Road Wilmington 19808.0 2022-07-08 Routine 2-102.12 Certified Food Protection Manager 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) (39.737731167, -75.690144456) 39.737731167 -75.690144456
2 1000 Degree Pizza 4526 New Linden Hill Road Wilmington 19808.0 2022-07-08 Routine 5-202.12 Handwashing Sinks, Installation 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) (39.737731167, -75.690144456) 39.737731167 -75.690144456
3 1000 Degree Pizza 4526 New Linden Hill Road Wilmington 19808.0 2022-07-08 Routine 6-301.12 Hand Drying Provision 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) (39.737731167, -75.690144456) 39.737731167 -75.690144456
4 1000 Degree Pizza 4526 New Linden Hill Road Wilmington 19808.0 2022-07-08 Routine 3-501.16 Time/Temperature Control for Safety Food, Hot and Cold Holding 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) (39.737731167, -75.690144456) 39.737731167 -75.690144456

PREPARE A LIST OF ESTABLISHMENT NAMES AND LOCATIONS¶

In [9]:
# Create a dataframe that contains a unique list of food establishment names and coordinates.
# Location values repeat in the dataset for each inspection & violation.  Need to get the
# unique values so only one marker is added to the map for each food establishment.
unique_locations_df = inspections_df[['Food_Establishment_Name', 'Latitude', 'Longitude']].drop_duplicates()
In [10]:
# Review some results
unique_locations_df.head()
Out[10]:
Food_Establishment_Name Latitude Longitude
0 1 In Town African Restaurant 39.653333979 -75.616564004
1 1000 Degree Pizza 39.737731167 -75.690144456
8 11-Jul 39.178200014 -75.527233032
11 13 Wingz 38.645889987 -75.625000976
15 1300 Bar & Restaurant 39.748308989 -75.545611026

COUNT NUMBER OF VIOLATIONS FOR EACH INSPECTION¶

In [11]:
# Create a dataset that has the count of violations for each inspection
location_inspection_df = inspections_df[['Food_Establishment_Name', 'Inspection_Date', 'Violation_Code']].drop_duplicates()
grouped_df = location_inspection_df.groupby(['Food_Establishment_Name', 'Inspection_Date'])
inspection_violation_counts_df = grouped_df.count()

# Make the Food_Establishment_Name and Inspection_Date columns regular columns, not indexes
inspection_violation_counts_df = inspection_violation_counts_df.reset_index()

# Change a column name
inspection_violation_counts_df.rename(columns = {'Violation_Code': 'Violation_Count'}, inplace = True)
In [12]:
inspection_violation_counts_df.head()
Out[12]:
Food_Establishment_Name Inspection_Date Violation_Count
0 1 In Town African Restaurant 2022-03-04 0
1 1000 Degree Pizza 2022-07-08 5
2 1000 Degree Pizza 2022-08-01 1
3 1000 Degree Pizza 2022-08-11 0
4 11-Jul 2021-11-18 1

COUNT NUMBER OF INSPECTIONS FOR EACH ESTABLISHMENT¶

In [13]:
# Count rows in prior result grouping by establishment name to get the count of inspection dates
establishment_inspection_counts_df = inspection_violation_counts_df.groupby(['Food_Establishment_Name']).count().reset_index()
establishment_inspection_counts_df.rename(columns = {'Inspection_Date': 'Inspection_Count'}, inplace = True)
establishment_inspection_counts_df = establishment_inspection_counts_df.drop(columns=['Violation_Count'])
In [14]:
establishment_inspection_counts_df.head(10)
Out[14]:
Food_Establishment_Name Inspection_Count
0 1 In Town African Restaurant 1
1 1000 Degree Pizza 3
2 11-Jul 2
3 13 Wingz 2
4 1300 Bar & Restaurant 7
5 14 Global LLC 2
6 1776 Restaurant 2
7 1857 Jackson House 3
8 1861 2
9 2 Angel's Ice Cream 3

TOTAL NUMBER OF VIOLATIONS FOR EACH ESTABLISHMENT¶

In [15]:
# Use previous result to calculate the total number of violations for each establishment
establishment_violation_counts_df = inspection_violation_counts_df.groupby(['Food_Establishment_Name'])['Violation_Count'].sum().reset_index()

establishment_violation_counts_df.rename(columns = {'Violation_Count': 'Total_Violation_Count'}, inplace = True)
In [16]:
establishment_violation_counts_df.head()
Out[16]:
Food_Establishment_Name Total_Violation_Count
0 1 In Town African Restaurant 0
1 1000 Degree Pizza 6
2 11-Jul 3
3 13 Wingz 3
4 1300 Bar & Restaurant 25

GET THE DATE OF THE MOST RECENT INSPECTION FOR EACH ESTABLISHMENT¶

In [17]:
# Find the most recent inspection date for each establishment

# Get a list of all inspection dates for each estalishment
inspection_dates_df = inspections_df[['Food_Establishment_Name', 'Inspection_Date']].drop_duplicates()

# Get the most recent inspection date for each place.
latest_inspection_date_df = inspection_dates_df.loc[inspection_dates_df
        .groupby('Food_Establishment_Name').Inspection_Date.idxmax()]

# Change the date column name
latest_inspection_date_df.rename(columns = {'Inspection_Date': 'Latest_Inspection_Date'}, inplace = True)
In [18]:
latest_inspection_date_df.head()
Out[18]:
Food_Establishment_Name Latest_Inspection_Date
0 1 In Town African Restaurant 2022-03-04
7 1000 Degree Pizza 2022-08-11
9 11-Jul 2023-05-26
12 13 Wingz 2023-02-08
36 1300 Bar & Restaurant 2023-08-10

NUMBER OF VIOLATIONS IN THE MOST RECENT INSPECTION FOR EACH ESTABLISHMENT¶

In [19]:
latest_inspection_violations_df = latest_inspection_date_df.merge(
    inspection_violation_counts_df,
    left_on=['Latest_Inspection_Date', 'Food_Establishment_Name'],
    right_on=['Inspection_Date', 'Food_Establishment_Name'],
    how='inner')
In [20]:
latest_inspection_violations_df.head(10)
Out[20]:
Food_Establishment_Name Latest_Inspection_Date Inspection_Date Violation_Count
0 1 In Town African Restaurant 2022-03-04 2022-03-04 0
1 1000 Degree Pizza 2022-08-11 2022-08-11 0
2 11-Jul 2023-05-26 2023-05-26 2
3 13 Wingz 2023-02-08 2023-02-08 3
4 1300 Bar & Restaurant 2023-08-10 2023-08-10 4
5 14 Global LLC 2023-09-01 2023-09-01 2
6 1776 Restaurant 2022-08-31 2022-08-31 5
7 1857 Jackson House 2023-08-10 2023-08-10 2
8 1861 2023-02-10 2023-02-10 1
9 2 Angel's Ice Cream 2022-07-20 2022-07-20 2

IDENTIFY ESTABLISHMENTS THAT HAD NO VIOLATIONS IN THEIR LATEST INSPECTION¶

In [21]:
# Find places that had no violations on most recent inspection.
latest_inspection_no_violations_df = latest_inspection_violations_df.loc[latest_inspection_violations_df['Violation_Count'] == 0]    
In [22]:
latest_inspection_no_violations_df.head()
Out[22]:
Food_Establishment_Name Latest_Inspection_Date Inspection_Date Violation_Count
0 1 In Town African Restaurant 2022-03-04 2022-03-04 0
1 1000 Degree Pizza 2022-08-11 2022-08-11 0
11 2S Pizza Company 2023-02-07 2023-02-07 0
12 2nd To None 2023-09-13 2023-09-13 0
13 3 Countries Pizzeria 2023-05-18 2023-05-18 0

CREATE SOME FUNCTIONS TO SET MAP MARKER APPEARANCE¶

In [23]:
# Create a function that sets a marker icon based on inspection results
# If the establishment had no violations in its most recent inspection, use "smile" icon,
# Otherwise use 'frown'
def get_status_icon(location_name):
    if location_name in latest_inspection_no_violations_df['Food_Establishment_Name'].values:
        return 'smile'
    else:
        return 'frown'
In [24]:
# Create a function that retuns the most recent inspection date
def get_latest_inspection_date(location_name):
    latest_date_df = latest_inspection_date_df.loc[latest_inspection_date_df['Food_Establishment_Name'] == location_name]
    return str(latest_date_df['Latest_Inspection_Date'].item())[:10] 
In [25]:
# Create a function that returns the total number of inspections for the establishment
def inspection_count(location_name):
    try:
        inspect_count_df = establishment_inspection_counts_df.loc[establishment_inspection_counts_df['Food_Establishment_Name'] == location_name]    
        return str(inspect_count_df['Inspection_Count'].item())
    except:
        return 'unknown'
In [26]:
# Create a function that returns the number of violations to-date for the establishment
def violations_total(location_name):
    try:
        violation_count_df = establishment_violation_counts_df.loc[establishment_violation_counts_df['Food_Establishment_Name'] == location_name]    
        return str(violation_count_df['Total_Violation_Count'].item())
    except:
        return 'unknown'
In [27]:
# Create a function that returns the number of violations from the most recent inspection 
# for the establishment
def violations_recent(location_name):
    try:
        violation_count_df = latest_inspection_violations_df.loc[latest_inspection_violations_df['Food_Establishment_Name'] == location_name]    
        return str(violation_count_df['Violation_Count'].item())
    except:
        return 'unknown'
In [28]:
# Start a map of Delaware centered on Dover
# 39.1582°, -75.5244°  # coordinates of Dover, DE
# 39.620198 -75.670477  # coordinates of Bear, DE
mp = folium.Map(location=[39.620198, -75.670477], zoom_start=14, width=900, height=550, control_scale=True)
# mp is the map object.  I will next add a marker to it for each food location.
In [29]:
# Add the markers to the map
for index, row in unique_locations_df.iterrows():
    # print(row['Food_Establishment_Name'],row['Latitude'], row['Longitude'])
    # Get status icon
    icon_name = get_status_icon(row['Food_Establishment_Name'])
    total_inspections = inspection_count(row['Food_Establishment_Name'])
    total_violations = violations_total(row['Food_Establishment_Name'])
    recent_violations = violations_recent(row['Food_Establishment_Name'])
    latest_inspection_date = get_latest_inspection_date(row['Food_Establishment_Name'])
    location_name = row['Food_Establishment_Name']
    try:
        folium.Marker(
            location = [row['Latitude'], row['Longitude']],
            popup = row['Food_Establishment_Name'],
            icon = folium.Icon(color = 'blue', 
                               icon_color = 'yellow', 
                               icon = icon_name, 
                               prefix = 'fa'),
            tooltip = location_name[0:20]
            + '<br>' + 'Number of inspections: ' + str(total_inspections)
            + '<br>' + 'Latest inspection date: ' + str(latest_inspection_date)
            + '<br>' + 'Total violations: ' + str(total_violations)
            + '<br>' + 'Recent violations; ' + str(recent_violations)
        ).add_to(mp)
    except: # not all locations are geocoded in the dataset, skip them
        print(row['Food_Establishment_Name'], ' No coordinates available')
        pass
Aspira Academy High School  No coordinates available
Eat BAP Mobile Unit  No coordinates available
Newark Charter School  No coordinates available
In [30]:
# Display the map
mp
Out[30]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]: